
clear all

/// This file computes county-level Representation Indices using county-district-year data.


use "$OutputPath/county_sldl_pop_elec", clear // county-sldl-year data

tempfile base
save `base', replace 

// Merge with county-level characteristics for education-based measure of Representation Index (robustness checks)
use "$OutputPath/nhgis_county", clear

gen pbachelors=bachelors/population
keep pbachelors year fstate fcounty

merge 1:m year fstate fcounty using `base', gen(m_edu)

drop if m_edu==1
drop m_edu

save `base', replace

// Compute district-level and county-level characteristics

rename (population) (pop)
gen edu=pbachelor*pop

foreach i in pop vap cvap edu {
	egen d_`i'=total(`i'), by(year fstate sldl) missing
	egen c_`i'=total(`i'), by(year fstate fcounty) missing
}

//corrections to nmembers in VT 2002 -- used http://www.leg.state.vt.us/reports/02Redistricting/Act151_House_Statistics.pdf (see nmember_corrections.xls). District names are inconsistent across years -- but can use population to determine 1 or 2 members. if 1 member, d_vap ~3000 if 2 members, d_vap~ 6,000
replace nmember=2 if d_vap>5000 & state=="Vermont"
replace nmember=1 if d_vap<=5000 & state=="Vermont"

	//verify nmember by checking that standard deviation of nmember / d_vap is small within a stateyear
	/*
	gen nmember_d=nmember/d_vap
	egen test = sd(nmember_d), by(state year)
	sum test //all very small.
	*/ 

// measure counties vs. districts

gen pdistrictinc=vap/d_vap
gen pcountyind=vap/c_vap

assert pcountyind<=1 if pcountyind!=.
assert pdistrictinc<=1 if pdistrictinc!=.

gen RRI=(nmember/d_pop)*(pcountyind) //Relative Representation Index (weighted) as in AGS 2002--to be summed in collapse data

gen fragmentation = ((pop)/c_pop)^2 //to be summed in collapse

gen ndistricts=1 //to be summed in collapse
gen nwholerep=0
replace nwholerep=nmember if pdistrictinc==1 //to be summed in collapse
gen nsharedrep=0
replace nsharedrep=nmember if pdistrictinc<1 & pdistrictinc>0 //to be summed in collapse

egen neighbors=count(fcounty), by(year state sldl) // Count counties in each district -- calculate neighbors
replace neighbors=neighbors-1 //to be summed in collapse

// assign election year turnout to fiscal year
foreach i in d r {
gen turnout_`i'=g2004_sth_`i'v if year==2007
replace turnout_`i'=g2003_sth_`i'v if year==2007 & (state=="Louisiana" | state=="New Jersey")
replace turnout_`i'=g2002_sth_`i'v if year==2007 & (state=="Texas")

replace turnout_`i'=g2010_sth_`i'v if year==2012
replace turnout_`i'=g2007_sth_`i'v if year==2012 & (state=="Louisiana" | state=="New Jersey")

replace turnout_`i'=g2014_sth_`i'v if year==2017
replace turnout_`i'=g2012_sth_`i'v if year==2017 & ( state=="Texas" ) 

} 

gen turnout=turnout_d+turnout_r

gen dvoteshare=turnout_d/turnout
gen rvoteshare=turnout_r/turnout

egen c_turnout=total(turnout), by(fstate fcounty year) missing
egen c_turnout_d=total(turnout_d), by(fstate fcounty year) missing
gen  c_dvoteshare=c_turnout_d/c_turnout

egen d_turnout_d=total(turnout_d), by(fstate sldl year) missing
egen d_turnout_r=total(turnout_r), by(fstate sldl year) missing
gen  d_turnout=d_turnout_d+d_turnout_r

gen d_dvoteshare=d_turnout_d/d_turnout

egen s_turnout_d=total(turnout_d), by(year fstate) missing
egen s_turnout_r=total(turnout_r), by(year fstate) missing
gen s_turnout=s_turnout_d+s_turnout_r


//county-level std. dev. in D two-party vote share over study period
egen dvotesd=sd(c_dvoteshare), by(fstate fcounty) 
egen d_dvotesd=total(dvotesd), by(year fstate sldl) missing
egen d_cvap_swing=total(cvap*dvotesd)

/// Compute district-level election outcomes: uncontested, winner, decisiveness, swingness, and closeness
gen uncontested=(d_turnout_d==0 | d_turnout_r==0)  
	* uncontested elections have 0 turnout in some states or missing in other states

replace uncontested=. if s_turnout==. 

// Democrat wins, winning votes
gen dwins=(d_turnout_d>d_turnout_r)
replace dwins=. if d_turnout_d==.

gen winning= dwins*turnout_d + (1-dwins)*turnout_r
egen d_winning=total(winning), by(year fstate sldl) missing

// compute state-level sigma before summing across districts
egen sldlflag=tag(fstate sldl year) 
egen sigma=total(sldlflag*(d_dvoteshare*(1-d_dvoteshare))), by(fstate year) missing
egen delta_not=total(sldlflag*(d_dvoteshare-(1-d_dvoteshare))), by(fstate year) missing
gen  delta=sigma+(d_dvoteshare-(1-d_dvoteshare))*delta_not/4
gen  deltaavg=delta*pdistrictinc

gen swing=1/(1+abs(turnout_d-turnout_r)/turnout)
egen d_swing=total(1/(1+abs(turnout_d-turnout_r)/turnout)), by(year fstate sldl) missing

gen close_30=(abs((turnout_d-turnout_r)/turnout)<0.30)
replace close_30=. if turnout==.


//Measures of psi, total share of votes (TSV, to be summed in collapse)
foreach i in cvap  {
	gen psi_`i'=nmember*(1-(`i'/d_`i'))
	replace psi_`i'=0 if psi_`i'<0 // some values are -1e-7 due to stata's precision, replace with zero

	gen psi_`i'_delta=nmember*(1-delta*(`i'/d_`i'))
	replace psi_`i'_delta=0 if psi_`i'_delta<0
}

foreach i in cvap {
	gen TSV_`i'=nmember*`i'/d_`i' 
	gen TSV_`i'_swing=nmember*(`i'*dvotesd)/(d_`i'*d_dvotesd)
	gen TSV_`i'_delta=TSV_`i'*delta //multiplied by delta, decisiveness
}

foreach i in turnout edu winning {
	gen TSV_`i'=nmember*`i'/d_`i' 
}

// Collapse to district-year level to extract legislature statistics

save `base', replace

collapse (mean)  uncontested demwon repwon indwon totaldistricts nmember ndistricts (sum) pop (first) state , by(year fstate sldl)

egen demsinhouse=total(demwon), by(year fstate) missing
egen repsinhouse=total(repwon), by(year fstate) missing
gen demmajority=(demsinhouse>repsinhouse )
replace demmajority=. if demsinhouse==. | repsinhouse==.

rename pop district_population

drop nmember ndistricts state  //only needed at district-level for district dataset -- collapse to county level next.

// merge back to county-district-year data
merge 1:m year fstate sldl using `base'
drop _merge

/// Collapse to county level 

local meanvars totaldistricts dvotesd demmajority

local sumvars TSV_* psi*  ndistricts nmember nwholerep nsharedrep neighbors RRI fragmentation urban cvap

* use ACS county-level total population data rather than measures aggregated from block-level Census data
//don't keep   rural whitenh hispanic white black male female vap

local sumvars_nomiss close_30 turnout* demwon repwon indwon incumbentwon uncontested //don't include turnout; turnout data missing if uncontested, don't drop from sample by setting to missing


foreach j of varlist `meanvars' `sumvars'   {
gen allmiss_`j'=(`j'==.) //take mean in collapse
}

collapse (sum) `sumvars' `sumvars_nomiss' (mean) `meanvars' allmiss*  (first) state , by(year fstate fcounty)


foreach j of varlist `meanvars' `sumvars' {
replace `j'=. if allmiss_`j'==1
}

drop allmiss* 

gen anyclose_30=close_30>0
replace anyclose_30=. if close_30==.
drop close_30


cd "$OutputPath/"
save county_pop_elec, replace

